## Replication files for "Do Online Ads Influence Vote Choice?"
## anselm.rink@gmail.com
## This code replicates Figure 1

rm(list=ls())
library(ri) 
library(foreign)
library(lme4)
library(arm)
library(rstan)
library(rstudioapi)
library(ggplot2)
library(gridExtra)

## Change wd as desired 

## DIRECTORY "BayesMC" MUST EXIST IN WORKING DIRECTORY
## STAN IS REQUIRED; SEE https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started

setwd("")
data <- read.csv("data.csv") 

## Generate treatment indicator
treat <- 1-(data$control)
treat <- ifelse(treat == 1, T, F)


## Frequentist benchmark model
m1_ols <- lm(cdu16erst ~ treat, data=data)
summary(m1_ols)


## Bayesian multi-level model

## stan options

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

## set up stan data

stan_dat <- list(N = length(treat), 
                 y = data$cdu16erst,
                 treat = ifelse(treat, 2, 1))

# set up modelling structure (change priors in code2 and data!!)

code2 <- "data {
     int<lower=0> N; 
int<lower=1,upper=2> treat1[N];
vector[N] y;
real<lower=-1,upper=1> prior_mu_advertisment;
real<lower=0,upper=1> prior_se_advertisment;
real<lower=-1,upper=1> prior_mu_noadvertisment;
real<lower=0,upper=100> prior_se_noadvertisment;
} 
parameters {
vector[2] a1;
real<lower=0,upper=100> sigma_y;
} 
transformed parameters {
vector[N] y_hat;
for (i in 1:N)
y_hat[i] <- a1[treat1[i]];
}
model{
a1[2] - a1[1] ~ normal(prior_mu_advertisment, prior_se_advertisment);
y ~ normal(y_hat, sigma_y);
}"

## structure Stan data

stan_dat2 <- list(N = length(treat), 
                  y = data$cdu16erst,
                  treat1 = ifelse(treat, 2, 1),
                  prior_mu_advertisment = 0.017, # mu prior
                  prior_se_advertisment = 0.011, # se prior
                  prior_mu_noadvertisment = 0, # mu prior
                  prior_se_noadvertisment= 0.011 # se prior
                  )

## set up vector of priors for sensitivity analysis

n <- 50 
priors <- seq(0, 0.05, length.out = n)
prior_se <- summary(m1_ols)$coefficients[2, 2]
i = 1

## loop through priors
## takes ~10 min

## DIRECTORY "BayesMC" MUST EXIST IN WORKING DIRECTORY

for (p in priors) {
  
  #the loop only changes the prior for the foreign treatment, mu
  
  stan_dat2$prior_mu_advertisment <- p 
  fit_s2 <- stan(model_code = code2, data = stan_dat2, iter = 1000, chains = 4)
  
  ## get sims
  
  fit_s2_sim <- extract(fit_s2)
  treat_sim2 <- fit_s2_sim$a1
  
  ## get treatment effect
  
  diffs2 <- treat_sim2[, 2] - treat_sim2[, 1]
  
  ## save as single csvs, produces a lot of files...
  
  write.csv(diffs2, paste0("BayesMC/diffs_", i, ".csv"), row.names = F)
  i = i + 1
}


## read diff csv files, plot

list_diffs <- lapply(1:n, function(i) read.csv(paste0("BayesMC/diffs_", i, ".csv")))

## to df

list_diffs <- sapply(list_diffs, data.frame)

## get mean, 0.025 / 0.975 quantiles

means <- sapply(list_diffs, mean)
mins <- sapply(list_diffs, quantile, probs = 0.025)
maxs <- sapply(list_diffs, quantile, probs = 0.975)

## mark bold priors in plot

priors_bold = c(0.008, summary(m1_ols)$coefficients[2, 1], 0.025, 0.05)

index <- sapply(priors_bold, function(x) {
  which.min(abs(seq(0, 0.05, length.out = n) - x))
  })

## to df for plot

df_plot <- data.frame(estimate = means, 
                      min = mins, 
                      max = maxs, 
                      Prior = seq(0, 0.05, length.out = n))

df_plot$Prior <- seq(0, 0.05, length.out = n)

df_plot$bold <- "0"
df_plot$bold[c(index)] <- "1"

## plot

p3 <- ggplot(data = df_plot, aes(y = estimate, x = Prior)) +
      theme_bw(base_size = 18) + xlab("") + theme(legend.position = "none") +
      ylab("")
p3 <- p3 + geom_hline(yintercept = 0, 
                      linetype = "dashed")

p3 <- p3 + geom_errorbar(aes(ymin= min, 
                             ymax= max, 
                             size = bold), 
                             width=0,
                             alpha = 1)

p3 <- p3 + scale_size_manual(values = c("0" = 0.25, "1" = 1))
p3 <- p3 + geom_point(aes(y = estimate, x = Prior), 
                      size=2, 
                      shape = 21,
                      fill="white") 

p3 <- p3 + theme(legend.title=element_blank())
p3 <- p3 + theme(panel.grid.major = element_blank(), 
                 panel.grid.minor = element_blank())
p3

df_ann <- data.frame(Prior =    c(0.008, 0.017, 0.025, 0.048), 
                     estimate = c(0.035, 0.04, 0.043, 0.01), 
                     lab = c("Bhatti et al.\n(Forthcoming)",
                             "Frequentist\nestimate",  
                             "Green et al.\n(2013)",
                             "Panagopoulos\nand Green\n(2008)"))

p3 <- p3 + geom_text(data = df_ann, aes(label = lab), size = 5) + 
  xlab("Prior on treatment effect") +
  ylab("Bayesian estimate")
p3


grid.arrange(p3,nrow=1)
pdf("bayes.pdf", width = 10, height = 5)
grid.arrange(p3,nrow=1)
dev.off() 

